Instructors: Shaun Harrigan, Katie Smith, Berry Boessenkool and Daniel Klotz
Organizer: Berry Boessenkool, PhD student at Potsdam University (Germany)
Contact: Questions and feedback are welcome via berry-b@gmx.de
These slides and all other course materials can be found at
github.com/brry/rhydro
Download the github course repository with all the materials including the datasets and presentation source code.
This is an R Markdown Notebook.
For discussions, please visit the Hydrology in R Facebook group.
Before running the code blocks below, we suggest to get package installation instructions by running:
source("https://raw.githubusercontent.com/brry/rhydro/master/checkpc.R")
Aim and contents of this workshop
We want to:
We can not:
We have prepared:
rmarkdown, R notebook)sf + leaflet + mapview + OSMscale)animation + extremeStat)airGR
Before we get started, please let us know your current R knowledge level by filling out the short survey at
bit.ly/knowR
Good coding practice, report generation (Rstudio, rmarkdown, R notebook)
Daniel Klotz
goals
Why I did not use:
equals
Whats great about R:
library(ggplot2)
test_data <- mpg
test_plot <- ggplot(test_data, aes(displ, hwy, colour = class)) +
geom_point()
test_plot
Why I decided to use R:
pipe
Previously:
aggregation_function <- function(x) {
round(mean(x),2)
}
mtcars_subset <- subset(mtcars,hp > 100)
mtcars_aggregated <- aggregate(. ~ cyl, data = mtcars_subset, FUN = aggregation_function)
car_data1 <- transform(mtcars_aggregated, kpl = mpg*0.4251)
print(car_data1)
Now:
library(magrittr)
car_data2 <-
mtcars %>%
subset(hp > 100) %>%
aggregate(. ~ cyl, data = ., FUN = . %>% mean %>% round(2)) %>%
transform(kpl = mpg %>% multiply_by(0.4251)) %>%
print
btw: You can integrate other programming languages with ease. Here an example from Yihui Xie for the use of Fortran in Rmarkdown:
Compile Code:
C Fortran test
subroutine fexp(n, x)
double precision x
C output
integer n, i
C input value
do 10 i=1,n
x=dexp(dcos(dsin(dble(float(i)))))
10 continue
return
endRun Code:
res = .Fortran("fexp", n=100000L, x=0)
str(res)Be happy with the result: > ## List of 2 > ## $ n: int 100000 > ## $ x: num 2.72
In Rstudio:
“Native” Formats:
Much more possible if you adress pandoc directly:
Information in the text can be automatically updated with the rest of the document: [
Using R as GIS (reading a rainfall shapefile + Kriging, sf + leaflet + mapview + OSMscale)
Berry Boessenkool
Reading shapefiles with maptools::readShapeSpatial and rgdal::readOGR is obsolete.
Instead, use sf::st_read. sf is on CRAN since oct 2016.
Main reaction when using sf: “Wow, that is fast!”
Download the shapefile or better: download the whole github course repository
rain <- sf::st_read("data/PrecBrandenburg/niederschlag.shp")
Central points of rainfall Thiessen polygons
centroids <- sf::st_centroid(rain)
centroids <- sf::st_coordinates(centroids)
centroids <- as.data.frame(centroids)
Static plot:
plot(rain[,1])
Static map:
prj <- sf::st_crs(rain)$proj4string
cent_ll <- OSMscale::projectPoints(Y,X, data=centroids, to=OSMscale::pll(), from=prj)
#map_static <- OSMscale::pointsMap(y,x, cent_ll, fx=0.08, type="maptoolkit-topo", zoom=6)
#save(map_static, file="data/map_static.Rdata")
load("data/map_static.Rdata")
OSMscale::pointsMap(y,x, cent_ll, map=map_static)
Interactive map:
library(leaflet)
cent_ll$info <- paste0(sample(letters,nrow(cent_ll),TRUE), ", ", round(cent_ll$x,2),
", ", round(cent_ll$y,2))
leaflet(cent_ll) %>% addTiles() %>% addCircleMarkers(lng=~x, lat=~y, popup=~info)
Interactive map of shapefile:
# make sure to have installed the development version of mapview:
# devtools::install_github("environmentalinformatics-marburg/mapview", ref = "develop")
library(berryFunctions) # classify, seqPal
col <- seqPal(n=100, colors=c("red","yellow","blue"))[classify(rain$P1)$index]
mapview::mapview(rain, col.regions=col)
Plot original points colored by third dimension:
pcol <- colorRampPalette(c("red","yellow","blue"))(50)
x <- centroids$X # use cent_ll$x for projected data
y <- centroids$Y
berryFunctions::colPoints(x, y, rain$P1, add=FALSE, col=pcol)
Calculate the variogram and fit a semivariance curve
library(geoR)
geoprec <- as.geodata(cbind(x,y,rain$P1))
vario <- variog(geoprec, max.dist=130000) # other maxdist for lat-lon data
fit <- variofit(vario)
plot(vario)
lines(fit)
Determine a useful resolution (keep in mind that computing time rises exponentially with grid size)
# distance to closest other point:
d <- sapply(1:length(x), function(i)
min(berryFunctions::distance(x[i], y[i], x[-i], y[-i])) )
# for lat-long data use (2017-Apr only available in github version of OSMscale)
# d <- OSMscale::maxEarthDist(y,x, data=cent_ll, fun=min)
hist(d/1000, breaks=20, main="distance to closest gauge [km]")
mean(d/1000) # 8 km
Perform kriging on a grid with that resolution
res <- 1000 # 1 km, since stations are 8 km apart on average
grid <- expand.grid(seq(min(x),max(x),res),
seq(min(y),max(y),res))
krico <- krige.control(type.krige="OK", obj.model=fit)
#krobj <- krige.conv(geoprec, loc=grid, krige=krico)
#save(krobj, file="data/krobj.Rdata")
load("data/krobj.Rdata") # line above is too slow for recreation each time
Plot the interpolated values with image or an equivalent function (see Rclick 4.15) and add contour lines.
par(mar=c(0,3,0,3))
geoR:::image.kriging(krobj, col=pcol)
colPoints(x, y, rain$P1, col=pcol, legargs=list(horiz=F, title="Prec",y1=0.1,x1=0.9))
points(x,y)
plot(rain, col=NA, add=TRUE)
River discharge time-series visualisation and extreme value statistics (animation + extremeStat)
Berry Boessenkool
Datasets from http://nrfa.ceh.ac.uk/data/station/meanflow/39072
Download discharge1, discharge2_xxx, discharge3_xxx, or better: download the whole github course repository
Read and transform data
Q <- read.table("data/discharge39072.csv", skip=19, header=TRUE, sep=",", fill=TRUE)[,1:2]
colnames(Q) <- c("date","Royal_Windsor_Park")
Q$date <- as.Date(Q$date, format="%Y-%m-%d")
Examine data
head(Q)
str(Q)
Simple time series plot
plot(Q, type="l", col="blue")
Publication-ready graphics
png("DischargeVis.png", width=20, height=10, units="cm", res=500)
#pdf("DischargeVis.pdf", width=20/2.5, height=10/2.5) # vector graph
par(mar=c(3.5,3.5,2.5,0.2), mgp=c(2.3,0.7,0), las=1)
plot(Q, type="l", col="blue", main="NRFA: Thames\nRoyal Windsor Park",
xlab="Date", ylab="Discharge [m\U{00B3}/s]")
dev.off()
Annual maxima, German hydrological year split at Oct 31
Q$hydyear <- as.numeric(format(Q$date+61, "%Y"))
annmax <- tapply(Q$Royal_Windsor_Park, Q$hydyear, max, na.rm=TRUE)
annmax <- annmax[-1]
hydyear <- as.numeric(names(annmax))
plot(hydyear, annmax, type="o", las=1)
library(extremeStat)
dlf <- distLfit(annmax)
plotLfit(dlf)
plotLfit(dlf, cdf=TRUE)
dle <- distLextreme(dlf=dlf, RPs=c(5,10,50,100), gpd=FALSE)
plotLextreme(dle)
Logarithmic plot with many fitted distribution functions
plotLextreme(dle, nbest=16, log=TRUE)
Return values (discharge estimates for given return periods)
dlf$returnlev
In reality, please use non-stationary EVS!
Uncertainty band for Wakeby distribution fit estimate
dle_boot <- distLexBoot(dle, n=10, nbest=1)
plotLexBoot(dle_boot, distcol="green")
More details in the package vignette
vignette("extremeStat")
Read data from several discharge stations
if(FALSE){
Q2 <- read.table("data/discharge_____.csv", skip=19, header=TRUE, sep=",", fill=TRUE)[,1:2]
colnames(Q2) <- c("date","dummy1"); Q2$date <- as.Date(Q2$date, format="%Y-%m-%d")
Q3 <- read.table("data/discharge_____.csv", skip=19, header=TRUE, sep=",", fill=TRUE)[,1:2]
colnames(Q3) <- c("date","dummy2"); Q3$date <- as.Date(Q3$date, format="%Y-%m-%d")
}
Q2 <- Q ; Q2[,2] <- Q2[,2] + 100 ; Q2 <- Q2[-(1:100),]
Q3 <- Q ; Q3[,2] <- Q3[,2] - 50 ; Q3 <- head(Q3, -365*3)
colnames(Q2)[2] <- "dummy1"; colnames(Q3)[2] <- "dummy2"
Merge datasets
dis <- Reduce(function(...) merge(..., all=T), list(Q,Q2,Q3))
Code to generate one movie slice
library(berryFunctions) # for lim0, monthAxis, textField, etc
scene <- function(i, vlcnote=TRUE, cex=1.2)
{
sel <- 0:120
dis2 <- dis[i + sel, ]
stat <- c("Royal_Windsor_Park", "dummy1", "dummy2")
col <- c("red", "blue", "orange")
names(col) <- stat
plot(dis2$date,dis2$Royal_Windsor_Park, type="n", ylim=lim0(500), las=1,
xaxt="n", yaxt="n", cex.lab=cex, xlab="",
ylab="Discharge [m\U{00B3}/s]", xaxs="i")
axis(2, cex.axis=cex, las=1)
Sys.setlocale("LC_TIME", "C")
monthAxis(midmonth=TRUE, format="%b\n%Y", cex=cex, mgp=c(3,3.5,0))
abline(h=1:6*100, v=dis2$date[format(dis2$date,"%d")=="01"], col=8)
for(s in stat) lines(dis2$date, dis2[,s], lwd=4, col=col[s])
xi <- seqR(sel,len=length(stat)+2)[-1]
xi <- head(xi, -1)
textField(dis2$date[xi], diag(as.matrix(dis2[xi,stat])), stat, cex=cex, col=col)
box()
if(vlcnote) mtext("VLC: 'e': single frame forward\n'SHIFT+LEFT': few seconds back",
side=3, line=-9, outer=TRUE, adj=0.95, cex=cex)
}
par(mar=c(5,5,0.5,0.5), mgp=c(3,0.7,0))
scene(150)
Actual movie
library(animation)
saveVideo({par(mar=c(6,8,1,1), mgp=c(5.5,0.7,0))
dummy <- pbsapply(seq(0, by=3, len=50), scene, cex=2); rm(dummy)
}, video.name="Qmovie.mp4", interval=0.1, ffmpeg="/usr/bin/ffmpeg",
ani.width=7*200, ani.height=5*200)
Exploratory Data Analysis including flow duration curve and trend analysis on time-series
Shaun Harrigan
Please give us feedback at bit.ly/feedbackR
For discussions, please use the Hydrology in R Facebook group.